#Load Packages
library(nhanesA)
library(tidyverse)
library(haven)
library(dplyr)
library(ggplot2)
library(survey)
library(sf)
library(tidycensus)
library(ggspatial)
library(leaflet)
library(RColorBrewer)
library(broom)
library(tigris)
library(tableone)
library(survey)
options(tigris_use_cache = TRUE)
options(progress_enabled = FALSE)Exploratory Study of Child Blood Lead Levels Nationally and in Philadelphia
BMIN503/EPID600 Final Project
1 Overview
This project aims to explore trends in childhood lead exposures. To understand this issue, data will be summarized and analyzed at national and local (Philadelphia) levels, utilizing the National Health and Nutrition Examination Survey (NHANES), the American Community Survey (ASC), the EPA’s Environmental Justice Indices, and OpenDataPhilly.Throughout this project, I consulted with three Penn Professors: Dr Cheryl Bettigole, the former health commissioner of Philadelphia, Dr Amin Chen, an environmental epidemiologist, and Dr John Holmes, a social epidemiologist. They provided insights on the availability and interpretation of data, important historical contexts, and key policy knowledge.
Materials for this project can be found here: GitHub Repository
2 Introduction
2.1 The Problem
Exposure to elevated lead levels during childhood is a critical environmental and public health issue. Lead is a neuro-toxin that impacts neurological and behavioral development in children. Measured most commonly via blood lead levels (BLL), there is no safe level of lead exposure in children. Even low levels of lead exposure during developmental stages can lead to lifelong adverse effects on cognitive function and attention.
A primary source of exposure for young children is deteriorating lead based paint, which is still present in many homes despite a national ban in 1978. Children commonly ingest paint chips or paint dust. Both acute and chronic exposure to lead can be dangerous. Elevated blood lead levels continue to disproportionately affect marginalized communities, especially low-income and minority groups who often live in older, non-remidiated housing. Addressing lead exposure is key to addressing health disparities nationally and in our communities.
2.2 The Policies
A number of federal, state and local policies have dramatically reduces the rates of lead exposures in the past 50 years. Understanding these policies is key to analyzing current and historical trends in the data.
National:
- 1974: Safe Drinking Water Act
- 1978: Lead based paints were banned in residences
- 1999: Lead Safe Housing Rule
- 2021: Biden-Harris Lead Pipe and Paint Action Plan
Local:
- 2011: Philadelphia Lead Paint Disclosure Law
- 2019: Expansion of the 2011 law to include all rental properties, not only those with children
2.3 This Project
Using data from the NHANES, ASC, OpenDataPhilly, this project sought to understand the relationship between lead exposure and various demographic, socioeconomic and neighborhood characteristics.
This project takes an interdisciplinary approach to better understand lead exposure issues at national, state and local levels. The project integrates social epidemiology, environmental health, public health and policy analysis to address the complex nature of issues that contribute to lead exposure and the significant impact it can have on children.
3 Methods
The study uses four datasets to understand lead exposure in children, providing both broad and detailed overviews of the issue. A detailed description of these datasets is provided below.
After loading the necessary packages, the data is uploaded and cleaned below.
3.1 Loading Packages
3.2 Loading and Cleaning Data
NHANES: NHANES data was initially pulled using the nhanesA package prior to the package being removed. The code below will be used a reference or will be used in the future if the package is made available again.
NHANES data was loaded from two time periods 2015-2016 and 2021-2023 (the most recent survey available). This survey is used to assess the health of American children. Demographic, income, and laboratory (BLL) data was used in this study. Based on 2021 criteria from the CDC, we consider BLLs over 3.5 ug/dl as elevated.
#import NHANES_I and NHANES_L (2015-2016 and 2021-2023): Demographics, Income, and Phlebotomy modules
varlist <- c("DEMO", "PBCD", "INQ")
varlist_years <- paste0(rep(varlist, each = 2), c("_I", "_L"))
#Load everything from varlist_years in nhanes as a list)
list_all <- sapply(varlist_years, function(x) {data.frame(nhanes(x))})
#Create a data.frame for each module
for(i in 1:length(list_all)) {
assign(names(list_all)[i], list_all[[i]])
}
#Combine modules from each year into one larger data.frame
for (i in 1:length(varlist)) {
assign(varlist[i], plyr::rbind.fill(mget(grep(varlist[i], ls(), value = T))))
}
rm(list = grep("_[IL]", ls(), value = T))
#Create a single data.frame that combines all modules
nhanes.data <- full_join(get(varlist[1]), get(varlist[2]), by = "SEQN")
#Create a single data.frame that combines all modules
for (i in 3:length(varlist)){
nhanes.data <- full_join(nhanes.data, get(varlist[i]), by = "SEQN")
}
rm(list = ls()[-which(ls() == "nhanes.data")])
names(nhanes.data) #confirm loaded correctly
#Limit the dataset to the necessary variables and limit to children under 5
nhanes.data.sub <- nhanes.data |>
select( id = SEQN, survey = SDDSRVYR, gender = RIAGENDR, age = RIDAGEYR, race3 = RIDRETH3, birth_country= DMDBORN4, timeUS = DMDYRUSR, income_poverty = INDFMPIR, bll=LBXBPB, blldetected =LBDBPBLC, poverty_level= INDFMMPC, IP_ratio= INDFMPIR, cash= IND310, interview_weight=WTINT2YR, MEC_weight =WTMEC2YR, samplingunit = SDMVPSU,strata = SDMVSTRA) |>
mutate(gender = factor(case_when(
gender == "Male" ~ 0,
gender == "Female" ~ 1),
levels = c(0, 1),
labels = c("Male", "Female")))|>
mutate(survey = factor(case_when(
survey == 9 ~ 0,
survey == 12 ~ 1
), levels = c(0, 1),
labels = c("2015-2016", "2021-2023")))|>
filter(age <10)|>
mutate(agecat = case_when(
age < 2 ~ "1-<2",
age >= 2 & age < 3 ~ "2-<3",
age >= 3 & age < 4 ~ "3-<4",
age >= 4 & age < 5 ~ "4-<5",
age >= 5 & age < 6 ~ "5-<6",
age >= 7 & age < 8 ~ "7-<8",
age >= 8 & age < 9 ~ "8-<9",
age >= 9 & age < 10 ~ "9-<10")
)
#planning to relevel or mutate the other variables
head(nhanes.data.sub)
#Weighting data (still working on how to use this in the Results)
nhanes.data.sub$adjusted_weight <- nhanes.data.sub$MEC_weight / 2
nhanes_design <- svydesign(
id = ~samplingunit,
strata = ~strata,
weights = ~adjusted_weight,
data = nhanes.data.sub,
nest = TRUE
)
summary(nhanes_design)
#Create a Dataset with BLL values for those < 18 years of age
nhanes.data.bll <-nhanes.data.sub|>
filter (!is.na(bll)) |>
mutate( elevated.bll = case_when(
bll < 0.5 ~ "not elevated",
bll >= 0.5 ~ "elevated"),
elevated.bll = factor(elevated.bll,
levels = c("not elevated", "elevated")))
head(nhanes.data.bll)
table(nhanes.data.bll$survey, nhanes.data.bll$elevated.bll)NHANES (data from website): Following removal of the nhanesA package, the data was pulled and loaded from files on the NHANES website. This data is also included in the repository.
#Load the indivudal files downloaded from the NHANES sites
#import NHANES_I and NHANES_L (2015-2016 and 2021-2023): Demographics, Income, and Phlebotomy modules
files <- c("DEMO_I.xpt", "INQ_I.xpt", "PBCD_I.xpt",
"DEMO_L.xpt", "INQ_L.xpt", "PBCD_L.xpt")
# Create a list
data_list <- lapply(files, read_xpt)
names(data_list) <- files
# Combine datasets for each year using purrr::reduce with full_join
combine_year <- function(pattern) {
files_subset <- names(data_list)[grep(pattern, names(data_list))]
reduce(data_list[files_subset], full_join, by = NULL)}
# Combine datasets for "_I" and "_L"
data_I <- combine_year("_I")Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
data_L <- combine_year("_L")Joining with `by = join_by(SEQN)`
Joining with `by = join_by(SEQN)`
# Combine data
nhanes.data <- bind_rows(data_I, data_L)
#Limit the dataset to the necessary variables and limit to children under 5
nhanes.data.sub <- nhanes.data |>
select( id = SEQN, survey = SDDSRVYR, gender = RIAGENDR, age = RIDAGEYR, race = RIDRETH3, hh_education = DMDHREDU, bll=LBXBPB, blldetected =LBDBPBLC, poverty_level= INDFMMPC, assets = IND310, interview_weight=WTINT2YR, lab_weight = WTPH2YR, MEC_weight =WTMEC2YR, samplingunit = SDMVPSU,strata = SDMVSTRA) |>
mutate(gender = factor(ifelse(gender == 1, 0, 1),
levels = c(0, 1),
labels = c("Male", "Female")))|>
mutate(survey = factor(case_when(
survey == 9 ~ 0,
survey == 12 ~ 1
), levels = c(0, 1),
labels = c("2015-2016", "2021-2023")))|>
filter(age <10)|> #limit the data to children under 10 who are most at risk of developing more severe outcome long term.
mutate(agecat = case_when(
age < 2 ~ "1-<2",
age >= 2 & age < 3 ~ "2-<3",
age >= 3 & age < 4 ~ "3-<4",
age >= 4 & age < 5 ~ "4-<5",
age >= 5 & age < 6 ~ "5-<6",
age >= 7 & age < 8 ~ "7-<8",
age >= 8 & age < 9 ~ "8-<9",
age >= 9 & age < 10 ~ "9-<10"))|>
mutate(race = factor(race,
levels = c(1, 2, 3, 4, 6, 7),
labels = c("Mexican American",
"Other Hispanic",
"Non-Hispanic White",
"Non-Hispanic Black",
"Non-Hispanic Asian",
"Other Race - Inc. Multi-Racial")))|>
mutate(hh_education = factor(ifelse(hh_education == ".", NA, hh_education),
levels = c(1, 2, 3),
labels = c("Less than high school degree",
"High school/ some college",
"College graduate or above")))|>
mutate(poverty_level = factor(ifelse(poverty_level %in%
c(7, 9, "."), NA, poverty_level),
levels = c(1, 2, 3),
labels = c("< 1.30",
"1.31 - 1.85",
" > 1.85")))|>
mutate(assets = factor(ifelse(assets %in% c(77, 99, "."), NA, assets),
levels = c(1, 2, 3, 4, 5),
labels = c("Less than $3000",
"$3001-$5000",
"$5001-$10000",
"$10001-$15000",
"$15001-$20000")))
head(nhanes.data.sub) #review data cleaning# A tibble: 6 × 16
id survey gender age race hh_education bll blldetected poverty_level
<dbl> <fct> <fct> <dbl> <fct> <fct> <dbl> <dbl> <fct>
1 83739 2015-20… Male 4 Non-… <NA> 0.29 0 " > 1.85"
2 83740 2015-20… Male 1 Othe… <NA> NA NA <NA>
3 83746 2015-20… Female 4 Non-… <NA> NA NA " > 1.85"
4 83748 2015-20… Male 3 Non-… <NA> NA NA "< 1.30"
5 83760 2015-20… Female 3 Non-… College gra… 0.44 0 "< 1.30"
6 83763 2015-20… Female 2 Othe… <NA> NA NA " > 1.85"
# ℹ 7 more variables: assets <fct>, interview_weight <dbl>, lab_weight <dbl>,
# MEC_weight <dbl>, samplingunit <dbl>, strata <dbl>, agecat <chr>
#Create a Dataset with BLL values as elevated of not elevated
nhanes.data.bll <-nhanes.data.sub|>
filter (blldetected == 0)|>
mutate( elevated.bll = case_when(
bll < 3.5 ~ "not elevated",
bll >= 3.5 ~ "elevated"),
elevated.bll = factor(elevated.bll,
levels = c("not elevated", "elevated")))
head(nhanes.data.bll)# A tibble: 6 × 17
id survey gender age race hh_education bll blldetected poverty_level
<dbl> <fct> <fct> <dbl> <fct> <fct> <dbl> <dbl> <fct>
1 83739 2015-20… Male 4 Non-… <NA> 0.29 0 " > 1.85"
2 83760 2015-20… Female 3 Non-… College gra… 0.44 0 "< 1.30"
3 83772 2015-20… Female 2 Othe… <NA> 0.55 0 "< 1.30"
4 83780 2015-20… Male 4 Mexi… <NA> 0.96 0 " > 1.85"
5 83792 2015-20… Female 3 Mexi… College gra… 1.61 0 " > 1.85"
6 83797 2015-20… Female 1 Othe… <NA> 0.49 0 " > 1.85"
# ℹ 8 more variables: assets <fct>, interview_weight <dbl>, lab_weight <dbl>,
# MEC_weight <dbl>, samplingunit <dbl>, strata <dbl>, agecat <chr>,
# elevated.bll <fct>
table(nhanes.data.bll$survey, nhanes.data.bll$elevated.bll)
not elevated elevated
2015-2016 1443 28
2021-2023 645 4
#Weighting data (using 2 sets of data based on NHANES instructions)
nhanes.data.bll$adjusted_weight <- nhanes.data.bll$MEC_weight / 2
nhanes.data.bll <- nhanes.data.bll |>
filter(!is.na(adjusted_weight))
nhanes_design <- svydesign(
id = ~samplingunit,
strata = ~strata,
weights = ~adjusted_weight,
data = nhanes.data.bll,
nest = TRUE
)
summary(nhanes_design) #review the weighted data Stratified 1 - level Cluster Sampling design (with replacement)
With (60) clusters.
svydesign(id = ~samplingunit, strata = ~strata, weights = ~adjusted_weight,
data = nhanes.data.bll, nest = TRUE)
Probabilities:
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.821e-05 7.797e-05 1.233e-04 1.277e-04 1.701e-04 3.764e-04
Stratum Sizes:
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 173 174
obs 50 142 85 81 91 69 75 143 86 115 110 134 135 114 41 40 38
design.PSU 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
actual.PSU 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
175 176 177 178 179 180 181 182 183 184 185 186 187
obs 46 41 40 41 54 36 47 53 37 42 37 33 64
design.PSU 2 2 2 2 2 2 2 2 2 2 2 2 2
actual.PSU 2 2 2 2 2 2 2 2 2 2 2 2 2
Data variables:
[1] "id" "survey" "gender" "age"
[5] "race" "hh_education" "bll" "blldetected"
[9] "poverty_level" "assets" "interview_weight" "lab_weight"
[13] "MEC_weight" "samplingunit" "strata" "agecat"
[17] "elevated.bll" "adjusted_weight"
ACS Data: The ACS data was loaded. The ACS collects a wide variety of data from Americans to inform policy. For this study, housing related data from 2015 was collected to assess home ownership and the year a home was built. These are both important considerations given that many policies focus on lead remediation in rental units and properties build before 1978, when lead paint was banned in residential homes. A census API key was previously loaded and is not included in this document. The data was then cleaned and appropriate housing related variables were identified.
#revising which variables collecting from the ACS data to use in the analysis -> will use this for mapping and regression
#National ACS Data by County
acs.data.national<- get_acs(geography= "county",
year= 2015,
variables = c("B25035_001E", #median year home built
"B25036_001E", #total occupied
"B25036_002E", #owner occupied
"B25036_013E", #renter occupied
"B25034_001E", #total homes
"B25034_002E", #Built 2020 or later
"B25034_003E", #Built 2010 to 2019
"B25034_004E", #Built 2000 to 2009
"B25034_005E", #Built 1990 to 1999
"B25034_006E", #Built 1980 to 1989
"B25034_007E", #Built 1970 to 1979
"B25034_008E", #Built 1960 to 1969
"B25034_009E", #Built 1950 to 1959
"B25034_010E", #Built 1940 to 1949
"B25034_011E"), #Built 1939 or earlier
output = "wide")Getting data from the 2011-2015 5-year ACS
# Transform the data to create percentages by year and ownership
acs.data.national <- acs.data.national |>
mutate(Built_after_1980 = B25034_002E +
B25034_003E + B25034_004E + B25034_005E + B25034_006E,
Built_before_1979 = B25034_007E +
B25034_008E + B25034_009E + B25034_010E + B25034_011E,
Percent_after_1980 = (Built_after_1980 / B25034_001E) * 100,
Percent_before_1979 = (Built_before_1979 / B25034_001E) * 100)|>
mutate(
Percent_Owner_Occupied = (B25036_002E / B25036_001E) * 100,
Percent_Renter_Occupied = (B25036_013E / B25036_001E) * 100)
#To visualize this data, load county polygon data
us.counties1 <- counties(year=2015, cb = TRUE, resolution = "500k")
us.counties <- us.counties1 |>
filter(!STATEFP %in% c("02", "15", "60",
"66", "69",
"72", "78"))
#Combine the county polygons with ACS national data
us.counties <- us.counties |>
left_join(acs.data.national, by = "GEOID")
#Philadelphia ACS Data by Tract
acs.data.phl<- get_acs(geography= "tract",
year= 2015,
variables = c("B25035_001E", #median year home built
"B25036_001E", #total occupied
"B25036_002E", #owner occupied
"B25036_013E", #renter occupied
"B25034_001E", #total homes
"B25034_002E", #Built 2020 or later
"B25034_003E", #Built 2010 to 2019
"B25034_004E", #Built 2000 to 2009
"B25034_005E", #Built 1990 to 1999
"B25034_006E", #Built 1980 to 1989
"B25034_007E", #Built 1970 to 1979
"B25034_008E", #Built 1960 to 1969
"B25034_009E", #Built 1950 to 1959
"B25034_010E", #Built 1940 to 1949
"B25034_011E"), #Built 1939 or earlier
output = "wide",
state="PA",
county="Philadelphia")Getting data from the 2011-2015 5-year ACS
# Transform the data to create percentages by year and ownership
acs.data.phl<- acs.data.phl |>
mutate(
Built_after_1980 = B25034_002E +
B25034_003E + B25034_004E + B25034_005E + B25034_006E,
Built_before_1979 = B25034_007E +
B25034_008E + B25034_009E + B25034_010E + B25034_011E,
Percent_after_1980 = (Built_after_1980 / B25034_001E) * 100,
Percent_before_1979 = (Built_before_1979 / B25034_001E) * 100)|>
mutate(
Percent_Owner_Occupied = (B25036_002E / B25036_001E) * 100,
Percent_Renter_Occupied = (B25036_013E / B25036_001E) * 100)OpenDataPhilly Lead Data: The data set includes the number of children screened for elevated blood lead levels (BLL) defined as >5ug/dL (based on the CDC’s criteria in 2012, incidence of children with elevated BLL, and percent screened with elevated BLL by census tract from 2013-2015. Values are missing where there are less than 6 observations for confidentiality purposes.
#OpenDataPhilly Data
phllead <- read.csv("C:\\Users\\atrocle\\Documents\\.EPID 6000 Data Science\\Assignments\\Final Project\\opendataphl_lead_tract.csv", header = TRUE)
head(phllead) #confirm loading properly census_tract data_redacted num_bll_5plus num_screen perc_5plus
1 4.2101e+10 false 0 100 0
2 4.2101e+10 true NA 109 NA
3 4.2101e+10 true NA 110 NA
4 4.2101e+10 true NA 61 NA
5 4.2101e+10 false 0 41 0
6 4.2101e+10 true NA 49 NA
#Load Tract Polygon data
philly.tracts <- read_rds("https://raw.githubusercontent.com/HimesGroup/BMIN503/master/DataFiles/philly.tracts.2020.rds")
names(philly.tracts) [1] "STATEFP" "COUNTYFP" "TRACTCE" "GEOID" "NAME" "NAMELSAD"
[7] "MTFCC" "FUNCSTAT" "ALAND" "AWATER" "INTPTLAT" "INTPTLON"
[13] "geometry"
#Combine spatial with lead data
phllead <- phllead |>
rename(GEOID = census_tract) |>
mutate(GEOID = as.character(GEOID))
phllead_geo <- philly.tracts |>
left_join(phllead, by = "GEOID")
phllead_geo <- phllead_geo |>
left_join(acs.data.phl, by = "GEOID")EPA’s EJ Screen Dataset: The 2024 data was downloaded the the EPA’s website. The EJScreen data is a national dataset that combines environmental indicators with socioeconomic variables. The data can be used by researchers to inform policy related to environmental issues. In this study, we use demographic variables and justice indices related to lead. These indices combine proxies for exposures to lead paint, racial and income level variables.
#Load the data form csv
ejscreen <- read.csv("C:\\Users\\atrocle\\Documents\\.EPID 6000 Data Science\\Assignments\\Final Project\\EJScreen_2024_Tract.csv", header = TRUE)
#Select lead related and demographic variables and rename
ejscreen_limit <- ejscreen|>
select( GEOID = ID,
State = STATE_NAME,
County_name = CNTY_NAME,
Region = REGION,
Demographic_Index = DEMOGIDX_2,
Supplemental_Demographic_Index = DEMOGIDX_5,
Percent_POC = PEOPCOLORPCT,
Percent_Low_Income = LOWINCPCT,
Percent_Unemployed = UNEMPPCT,
Percent_Disabled = DISABILITYPCT,
Percent_Limited_English = LINGISOPCT,
Percent_No_HS_Education = LESSHSPCT,
Percent_Under5 = UNDER5PCT,
Percent_Over64 = OVER64PCT,
Percent_Low_Life_Expectancy = LIFEEXPPCT,
Lead_Paint_Percent = PRE1960PCT,
Lead_Paint_EJ_Index = D2_LDPNT,
Lead_Paint_Supp_Index = D5_LDPNT
)
#Combine EJJ with Philly and polygon data
ejscreen_phila <- ejscreen_limit|>
filter(State== "PENNSYLVANIA" & County_name =="Philadelphia County") |>
mutate(GEOID = as.character(GEOID))
#add to OpenDataPhilly Lead Data
phllead_geo <- phllead_geo |>
left_join(ejscreen_phila, by = "GEOID")
names(phllead_geo) [1] "STATEFP" "COUNTYFP"
[3] "TRACTCE" "GEOID"
[5] "NAME.x" "NAMELSAD"
[7] "MTFCC" "FUNCSTAT"
[9] "ALAND" "AWATER"
[11] "INTPTLAT" "INTPTLON"
[13] "data_redacted" "num_bll_5plus"
[15] "num_screen" "perc_5plus"
[17] "NAME.y" "B25035_001E"
[19] "B25035_001M" "B25036_001E"
[21] "B25036_001M" "B25036_002E"
[23] "B25036_002M" "B25036_013E"
[25] "B25036_013M" "B25034_001E"
[27] "B25034_001M" "B25034_002E"
[29] "B25034_002M" "B25034_003E"
[31] "B25034_003M" "B25034_004E"
[33] "B25034_004M" "B25034_005E"
[35] "B25034_005M" "B25034_006E"
[37] "B25034_006M" "B25034_007E"
[39] "B25034_007M" "B25034_008E"
[41] "B25034_008M" "B25034_009E"
[43] "B25034_009M" "B25034_010E"
[45] "B25034_010M" "B25034_011E"
[47] "B25034_011M" "Built_after_1980"
[49] "Built_before_1979" "Percent_after_1980"
[51] "Percent_before_1979" "Percent_Owner_Occupied"
[53] "Percent_Renter_Occupied" "State"
[55] "County_name" "Region"
[57] "Demographic_Index" "Supplemental_Demographic_Index"
[59] "Percent_POC" "Percent_Low_Income"
[61] "Percent_Unemployed" "Percent_Disabled"
[63] "Percent_Limited_English" "Percent_No_HS_Education"
[65] "Percent_Under5" "Percent_Over64"
[67] "Percent_Low_Life_Expectancy" "Lead_Paint_Percent"
[69] "Lead_Paint_EJ_Index" "Lead_Paint_Supp_Index"
[71] "geometry"
Themes: Finally, themes for maps are loaded.
#Create a theme for maps to be used in future maps
myPalette <- colorRampPalette(brewer.pal(9, "BuPu"))
map_theme <- function() {
theme_minimal() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_line(color = "white"),
legend.key.size = unit(0.8, "cm"),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))}4 Results
4.1 National Lead Data:
We first analyze national data from NHANES to gain a broader understanding of blood lead levels (BLL) across the United States before narrowing our focus to Philadelphia.
4.1.1 Exploratory Analysis of BLLs in Children
We initally take a look at the housing stock nationally to identify areas of concern based on the prevalence of lead paint in older homes. This analysis shows a concentration of housing build before 1979 in the Midwest and Northeast, reinforcing the need to explore localized data in places like Philadelphia.
pal_fun <- colorNumeric("BuPu", NULL)
pu_message <- paste0(us.counties$NAME.y,
"<br> Percent Built before 1979: ",
round(us.counties$Percent_before_1979), "%")
leaflet(us.counties) |>
addProviderTiles(providers$CartoDB.Positron) |>
addPolygons(stroke = TRUE,
color = "gray",
weight = 0.4,
fillColor = ~ pal_fun(Percent_before_1979),
fillOpacity = 0.8, smoothFactor = 0.5,
popup = pu_message) |>
addTiles()|>
addLegend("bottomright",
pal = pal_fun,
values = ~ Percent_before_1979,
title = 'Built Before 1979 (%)',
opacity = 1) |>
addScaleBar(position ="bottomleft")